#covidDataset2019.py
#method organize the USPTO json individual month data into smaller files
#written by Nick Shine
#November 2020

import json
import sys
import csv
import pandas as pd
import gc

#this number changes depending on the year, which contains a differing amount of data
numberOfYearsOfJSONdata = 39
#starting .json file
pedsJSONfile = '1900.json'

#variable for the number of apps count check
numberOfAppsTotal = 0

#variable for the month counts
appInfoJan = {}
appInfoFeb = {}
appInfoMar = {}
appInfoApr = {}
appInfoMay = {}
appInfoJun = {}
appInfoJul = {}
appInfoAug = {}
appInfoSep = {}
appInfoOct = {}
appInfoNov = {}
appInfoDec = {}

#variables to count apps with a missing app status message/art units
#already done in previous code step but a double check
missingAppStatusCount = 0
missingGroupArtUnitCount = 0

#dictionaries to hold tech center data
techCenter1600 = {}
techCenter1700 = {}
techCenter2100 = {}
techCenter2400 = {}
techCenter2600 = {}
techCenter2800 = {}
techCenter2900 = {}
techCenter3600 = {}
techCenter3700 = {}
techCenter3900 = {}
techCenterOther = {}

#variable to hold the location of the next dictionary index
janAccessIndex = 0
febAccessIndex = 0
marAccessIndex = 0
aprAccessIndex = 0
mayAccessIndex = 0
junAccessIndex = 0
julAccessIndex = 0
augAccessIndex = 0
sepAccessIndex = 0
octAccessIndex = 0
novAccessIndex = 0
decAccessIndex = 0

#app status messages 
abandonedReason1 = 'Abandoned  --  Failure to Respond to an Office Action'
abandonedReason2 = 'Expressly Abandoned  --  During Examination'
abandonedReason3 = 'Abandoned  --  Incomplete Application (Pre-examination)'
abandonedReason4 = 'Abandoned  --  Failure to Pay Issue Fee'
abandonedReason5 = 'Abandonment for Failure to Correct Drawings/Oath/NonPub Request'
abandonedReason6 = 'Expressly Abandoned  --  During Publication Process'
abandonedReason7 = 'Abandoned  --  After Examiner\'s Answer or Board of Appeals Decision'

withdrawnAbandonment = 'Withdrawn Abandonment, awaiting examiner action'

patentedCase = 'Patented Case'

patentExpiredNoFees = 'Patent Expired Due to NonPayment of Maintenance Fees Under 37 CFR 1.362'

docketedNewCase = 'Docketed New Case - Ready for Examination'

nfoaMailed = 'Non Final Action Mailed'

responseToNfoa = 'Response to Non-Final Office Action Entered and Forwarded to Examiner'

finalRejecMailed = 'Final Rejection Mailed'

responseToFinalAction = 'Response after Final Action Forwarded to Examiner'

#loop to pull out the selected data and organize the data into months and tech centers
for x in range(numberOfYearsOfJSONdata):

	with open(pedsJSONfile, 'r', encoding='utf-8') as json_file:
		data = json.load(json_file)
		totalNumberOfAppsCount = 0	

	for i in range(len(data['PatentData'])):

		loopArtUnitNumber = 'NULL'
		loopStatusDate = 'NULL'
		loopAppStatus = 'NULL'
		loopAppId = 'NULL'	

		loopAppId = data['PatentData'][i]['patentCaseMetadata']['applicationNumberText']['value']
		
		if 'groupArtUnitNumber' in data['PatentData'][i]['patentCaseMetadata']:
			
			loopArtUnitNumber = data['PatentData'][i]['patentCaseMetadata']['groupArtUnitNumber']['value']

		else:
			missingGroupArtUnitCount = (missingGroupArtUnitCount + 1)


		if 'applicationStatusCategory' in data['PatentData'][i]['patentCaseMetadata']:
			
			loopStatusDate = data['PatentData'][i]['patentCaseMetadata']['applicationStatusDate']

			loopAppStatus = data['PatentData'][i]['patentCaseMetadata']['applicationStatusCategory']

			if loopStatusDate[5:7] == '01':
				appInfoJan[janAccessIndex] = {'Month' : 'Jan', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
				janAccessIndex += 1
			elif loopStatusDate[5:7] == '02':
			 	appInfoFeb[febAccessIndex] = {'Month' : 'Feb', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
			 	febAccessIndex += 1
			elif loopStatusDate[5:7] == '03':
			 	appInfoMar[marAccessIndex] = {'Month' : 'Mar', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
			 	marAccessIndex += 1			
			elif loopStatusDate[5:7] == '04':
			 	appInfoApr[aprAccessIndex] = {'Month' : 'Apr', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
			 	aprAccessIndex += 1
			elif loopStatusDate[5:7] == '05':
				appInfoMay[mayAccessIndex] = {'Month' : 'May', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
				mayAccessIndex += 1
			elif loopStatusDate[5:7] == '06':
				appInfoJun[junAccessIndex] = {'Month' : 'Jun', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
				junAccessIndex += 1
			elif loopStatusDate[5:7] == '07':
				appInfoJul[julAccessIndex] = {'Month' : 'Jul', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
				julAccessIndex += 1
			elif loopStatusDate[5:7] == '08':		
				appInfoAug[augAccessIndex] = {'Month' : 'Aug', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
				augAccessIndex += 1
			elif loopStatusDate[5:7] == '09':				
				appInfoSep[sepAccessIndex] = {'Month' : 'Sep', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
				sepAccessIndex += 1
			elif loopStatusDate[5:7] == '10':				
				appInfoOct[octAccessIndex] = {'Month' : 'Oct', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
				octAccessIndex += 1
			elif loopStatusDate[5:7] == '11':
				appInfoNov[novAccessIndex] = {'Month' : 'Nov', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
				novAccessIndex += 1
			elif loopStatusDate[5:7] == '12':
				appInfoDec[decAccessIndex] = {'Month' : 'Dec', 'App Id' : loopAppId, 'Status' : loopAppStatus, 'ArtUnit' : loopArtUnitNumber}
				decAccessIndex += 1

		else:
			missingAppStatusCount = (missingAppStatusCount + 1)
		

    #clears the data variable holding the json data, which gets very large
	data.clear()
    #clears all unused variables in case Python hasn't done it yet, the memory requirements are very large
	gc.collect()

	
	#i starts starts at index 0, so you have to add one for accurate count
	totalNumberOfAppsCount = (i + 1)
	#print((data['PatentData'][i]['patentCaseMetadata']['applicationNumberText']['value']))
	
	print('The total number of apps with missing status is ', missingAppStatusCount)
	#print('\n')
	print('The total number of apps with missing art unit is ', missingGroupArtUnitCount)
	#print('\n')
	print('The total number of apps for', pedsJSONfile[0:4], ' is', totalNumberOfAppsCount, '\n')
	


	numberOfAppsTotal += totalNumberOfAppsCount

	#this code changes the string to switch between the differend years of JSON data from the USPTO to output
	#individual year data Excel files
	if pedsJSONfile == '1900.json' :
		pedsJSONfile = '1976.json'

	elif pedsJSONfile == '1976.json' :
		pedsJSONfile = '1981.json'

	elif pedsJSONfile == '1981.json' :
		pedsJSONfile = '1984.json'

	elif pedsJSONfile == '1984.json' :
		pedsJSONfile = '1986.json'

	else:
		changeTheYearDigits = pedsJSONfile[0:4]
	#these prints are for testing purposes
	#print(changeTheYearDigits)
		oneYearIncrement = (pd.Timestamp(changeTheYearDigits) + pd.DateOffset(years=1)).strftime('%Y')
	#these prints are for testing purposes
	#print(oneYearIncrement)
		pedsJSONfile = oneYearIncrement + '.json'
	
	if pedsJSONfile[0:4] != '2021':
		print('Year', pedsJSONfile[0:4], '\n')


print('The total overall processed is', numberOfAppsTotal, '\n')

print('Jan total is:', len(appInfoJan), '\n')
print('Feb total is:', len(appInfoFeb), '\n')
print('Mar total is:', len(appInfoMar), '\n')
print('Apr total is:', len(appInfoApr), '\n')
print('May total is:', len(appInfoMay), '\n')
print('Jun total is:', len(appInfoJun), '\n')
print('Jul total is:', len(appInfoJul), '\n')
print('Aug total is:', len(appInfoAug), '\n')
print('Sep total is:', len(appInfoSep), '\n')
print('Oct total is:', len(appInfoOct), '\n')
print('Nov total is:', len(appInfoNov), '\n')
print('Dec total is:', len(appInfoDec), '\n')

print('jan index count is', janAccessIndex, '\n')
print('feb index count is', febAccessIndex, '\n')
print('mar index count is', marAccessIndex, '\n')
print('apr index count is', aprAccessIndex, '\n')
print('may index count is', mayAccessIndex, '\n')
print('jun index count is', junAccessIndex, '\n')
print('jul index count is', julAccessIndex, '\n')
print('aug index count is', augAccessIndex, '\n')
print('sep index count is', sepAccessIndex, '\n')
print('oct index count is', octAccessIndex, '\n')
print('nov index count is', novAccessIndex, '\n')
print('dec index count is', decAccessIndex, '\n')

#save the organized data to file for backup and checking
monthFile = open('Jan2019.json', 'w')
jsonFile = json.dumps(appInfoJan)
monthFile.write(jsonFile)
monthFile.close()

monthFile = open('Feb2019.json', 'w')
jsonFile = json.dumps(appInfoFeb)
monthFile.write(jsonFile)
monthFile.close()

monthFile = open('Mar2019.json', 'w')
jsonFile = json.dumps(appInfoMar)
monthFile.write(jsonFile)
monthFile.close()

monthFile = open('Apr2019.json', 'w')
jsonFile = json.dumps(appInfoApr)
monthFile.write(jsonFile)
monthFile.close()

monthFile = open('May2019.json', 'w')
jsonFile = json.dumps(appInfoMay)
monthFile.write(jsonFile)
monthFile.close()

monthFile = open('Jun2019.json', 'w')
jsonFile = json.dumps(appInfoJun)
monthFile.write(jsonFile)
monthFile.close()

monthFile = open('Jul2019.json', 'w')
jsonFile = json.dumps(appInfoJul)
monthFile.write(jsonFile)
monthFile.close()

monthFile = open('Aug2019.json', 'w')
jsonFile = json.dumps(appInfoAug)
monthFile.write(jsonFile)
monthFile.close()

monthFile = open('Sep2019.json', 'w')
jsonFile = json.dumps(appInfoSep)
monthFile.write(jsonFile)
monthFile.close()

monthFile = open('Oct2019.json', 'w')
jsonFile = json.dumps(appInfoOct)
monthFile.write(jsonFile)
monthFile.close()

monthFile = open('Nov2019.json', 'w')
jsonFile = json.dumps(appInfoNov)
monthFile.write(jsonFile)
monthFile.close()

monthFile = open('Dec2019.json', 'w')
jsonFile = json.dumps(appInfoDec)
monthFile.write(jsonFile)
monthFile.close()



